Chapter 6 Functional analyses

6.1 Data preparation

tss <- function(abund){sweep(abund, 2, colSums(abund), FUN="/")} 

genome_counts <- genome_counts %>%
  column_to_rownames(var="genome")

#Get list of present MAGs
present_MAGs <- genome_counts %>%
  filter(rowSums(.[, -1]) != 0) %>%
  rownames()

#Align distillr annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]
genome_gifts_filt <- genome_gifts[present_MAGs,] %>%
  select_if(~!all(. == 0)) %>%  #remove all-zero modules
  select_if(~!all(. == 1)) #remove all-one modules

GIFTs_elements <- to.elements(genome_gifts_filt,GIFT_db)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs and get overall metabolic capacity indices per MAG (at the domain level)
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db) %>% as.data.frame() %>%
  mutate(Overall=rowMeans(select(.,Biosynthesis,Structure,Degradation), na.rm=TRUE))

# #Get overall metabolic capacity indices per MAG (at the domain level)
# rowMeans(GIFTs_functions) # averaged at the function level (each function is weighed equally)
# rowMeans(GIFTs_domains) # averaged at the domain level (each domain is weighed equally)

#Get community-weighed average GIFTs per sample
# GIFTs_elements_community <- to.community(GIFTs_elements,genome_counts,GIFT_db)
# GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts,GIFT_db)
# GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts,GIFT_db)


GIFTs_elements_community <- genome_counts %>%
  tss() %>%
  to.community(GIFTs_elements,.,GIFT_db)

GIFTs_functions_community <- genome_counts %>%
  tss() %>%
  to.community(GIFTs_functions,.,GIFT_db)

GIFTs_domains_community <- genome_counts %>%
  tss() %>%
  to.community(GIFTs_domains,.,GIFT_db)
GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample))  %>%
  group_by(species) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  species                MCI     sd
  <chr>                <dbl>  <dbl>
1 Sciurus carolinensis 0.297 0.0108
2 Sciurus vulgaris     0.327 0.0302
GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample))  %>%
  group_by(species) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  species                MCI     sd
  <chr>                <dbl>  <dbl>
1 Sciurus carolinensis 0.311 0.0117
2 Sciurus vulgaris     0.327 0.0375
GIFTs_domains_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample))  %>%
  group_by(species) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  species                MCI     sd
  <chr>                <dbl>  <dbl>
1 Sciurus carolinensis 0.325 0.0139
2 Sciurus vulgaris     0.324 0.0512
merge_gift <- GIFTs_domains_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  merge(., sample_metadata, by="sample")

6.2 Genome-specific GIFT profiles

GIFTs_elements %>%
  as_tibble(., rownames = "MAG") %>%
  reshape2::melt() %>%
  rename(Code_element = variable, GIFT = value) %>%
  inner_join(GIFT_db,by="Code_element") %>%
  ggplot(., aes(x=Element, y=MAG, fill=GIFT, group=Function))+
    geom_tile()+
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_fill_gradientn(limits = c(0,1), colours=brewer.pal(7, "YlGnBu"))+
    facet_grid(. ~ Function, scales = "free", space = "free")+
    theme_grey(base_size=8)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),strip.text.x = element_text(angle = 90))

6.3 Element-level community-averaged GIFT profiles

# GIFTs_elements_community %>%
#   reshape2::melt() %>%
#   rename(sample = Var1, Code_element = Var2, GIFT = value) %>%
#   left_join(sample_metadata, by = join_by(sample == sample)) %>%
#   left_join(GIFT_db,by="Code_element") %>%
#   ggplot(., aes(x=Code_element, y=sample, fill=GIFT))+
#     geom_tile()+
#     scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
#     scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
#     scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
#     theme_grey(base_size=8)+
#     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
#           strip.text.x = element_text(angle = 90), 
#           axis.text.y = element_blank()) +
#   facet_grid(species ~ Function, scales = "free", space = "free")

GIFTs_elements_community %>%
  reshape2::melt() %>%
  rename(sample = Var1, Code_element = Var2, GIFT = value) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(GIFT_db,by="Code_element") %>%
  ggplot(., aes(y=Element, x=sample, fill=GIFT))+
    geom_tile()+
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
    theme_grey(base_size=8)+
    theme(,
          strip.text.y = element_text(angle = 0), 
          axis.text.x = element_blank()) +
  facet_grid( Function ~ species , scales = "free", space = "free")

6.4 Function-level community-averaged GIFT profiles

# sample_sort <- sample_table %>%
#   select(sample,species,Area_type) %>%
#   arrange(species,Area_type) %>%
#   pull()
   
GIFTs_functions_community %>%
  reshape2::melt() %>%
  rename(sample = Var1, Code_function = Var2, GIFT = value) %>%
  left_join(GIFT_db,by = join_by(Code_function == Code_function)) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  #mutate(Function=factor(Function, levels = rev(unique(Function)))) %>%
  ggplot(., aes(y=Function, x=sample, fill=GIFT))+
    geom_tile()+
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_fill_gradientn(colours=brewer.pal(7, "YlGnBu"))+
    facet_grid(~species, scales="free") +
    theme_grey(base_size=8)+
    theme(axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1), 
          axis.text.x = element_blank(),
          strip.text.x = element_text(angle = 0))

species.df <- sample_metadata %>%
  select(sample, species, animal)

 GIFTs_functions_community %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(., species.df, by = join_by(sample == sample)) %>%
  select(-animal) %>%
  pivot_longer(-c(sample, species), names_to = "trait", values_to = "value") %>%
  mutate(trait = case_when(
      trait %in% GIFT_db$Code_function ~ GIFT_db$Function[match(trait, GIFT_db$Code_function)], TRUE ~ trait)) %>%
  mutate(trait=factor(trait,levels=unique(GIFT_db$Function))) %>%
  ggplot(aes(x=value, y=species, group=species, fill=species, color=species)) +
    geom_boxplot() +
    scale_color_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#999999", "#cc3333")) +
      scale_fill_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#bfbfbf", "#db7070")) +
    facet_grid(trait ~ ., space="free", scales="free") +
              theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                    axis.text.y = element_blank(),
                    strip.text.y = element_text(angle = 0)) + 
        labs(y="Traits",x="Metabolic capacity index")

# functions by host species and urbanization
GIFTs_functions_community %>%
  reshape2::melt() %>%
  rename(sample = Var1, Code_function = Var2, GIFT = value) %>%
  left_join(GIFT_db,by = join_by(Code_function == Code_function)) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  #mutate(sample=factor(Sample, levels = sample_sort)) %>%
  ggplot(., aes(y=Function, x=area_type, fill=GIFT))+
    geom_tile()+
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
    facet_grid(~species, scales="free") +
    theme_grey(base_size=8)+
    theme(axis.text.x = element_text(angle = 0, vjust = 0, hjust=0.5), 
          axis.text.y = element_text(),
          strip.text.x = element_text(angle = 0))

# functions by host species and seasons
sample_metadata$season <-factor(sample_metadata$season, levels = c("spring-summer", "autumn", "winter"))
GIFTs_functions_community %>%
  reshape2::melt() %>%
  rename(sample = Var1, Code_function = Var2, GIFT = value) %>%
  left_join(GIFT_db,by = join_by(Code_function == Code_function)) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  #mutate(sample=factor(Sample, levels = sample_sort)) %>%
  ggplot(., aes(y=Function, x=season, fill=GIFT))+
    geom_tile()+
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
    facet_grid(~species, scales="free") +
    theme_grey(base_size=8)+
    theme(axis.text.x = element_text(angle = 0, vjust = 0, hjust=0.5), 
          axis.text.y = element_text(),
          strip.text.x = element_text(angle = 0))

6.5 Domain-level community-averaged GIFT profiles

#Biosynthesis by species
biosynth.species <- merge_gift %>%
  ggboxplot(., x = "species", y = "Biosynthesis", color = "species", fill="white", add="jitter") +
      scale_color_manual(values=squirrel_colors) +
      scale_fill_manual(values=squirrel_colors) +
      stat_compare_means() +
      theme_classic() +
      labs(y = "Biosynthesis") +
      theme(
        legend.position = "none",
        axis.title.x = element_blank())

#Degradation by species
degradation.species <- merge_gift %>%
  ggboxplot(., x = "species", y = "Degradation", color = "species", fill="white", add="jitter") +
      scale_color_manual(values=squirrel_colors) +
      scale_fill_manual(values=squirrel_colors) +
      stat_compare_means() +
      theme_classic() +
      labs(y = "Degradation") +
      theme(
        legend.position = "none",
        axis.title.x = element_blank())

# #Structure by species
# structure.species <- merge_gift %>%
#   ggboxplot(., x = "species", y = "Structure", color = "species", fill="white", add="jitter") +
#       scale_color_manual(values=squirrel_colors) +
#       scale_fill_manual(values=squirrel_colors) +
#       stat_compare_means() +
#       theme_classic() +
#       labs(y = "Structure") +
#       theme(
#         legend.position = "none",
#         axis.title.x = element_blank())
# 
# #Overall by species
# overall.species <- merge_gift %>%
#   ggboxplot(., x = "species", y = "Overall", color = "species", fill="white", add="jitter") +
#       scale_color_manual(values=squirrel_colors) +
#       scale_fill_manual(values=squirrel_colors) +
#       stat_compare_means() +
#       theme_classic() +
#       labs(y = "Overall") +
#       theme(
#         legend.position = "none",
#         axis.title.x = element_blank())


sp.legend <- get_legend(biosynth.species)


ggarrange(biosynth.species, degradation.species, #+ rremove("x.text"), 
          legend.grob = sp.legend, legend="bottom", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 2, nrow = 1)

### Differences in bacterial functional capacity
#grid.arrange(arrangeGrob(p1, p5,p3, p4, ncol = 2))
merge_gift$area_type <-factor(merge_gift$area_type, levels = c("rural", "suburban", "urban"))

#Biosynthesis by species*area_type
biosynth.area <- merge_gift %>%
  ggboxplot(., x = "species", y = "Biosynthesis", color = "area_type", fill="white", add="jitter") +
  scale_color_manual(values=area_colors) +
  scale_fill_manual(values=area_colors) +
  stat_compare_means() +
  theme_classic() +
  labs(y = "Biosynthesis") +
  theme(
    legend.position = "right",
    legend.box = "vertical",
    axis.title.x = element_blank()) +
  guides(color=guide_legend(title="Area type"), fill="none")

#Degradation by species*area_type
degradation.area <- merge_gift %>%
  ggboxplot(., x = "species", y = "Degradation", color = "area_type", fill="white", add="jitter") +
  scale_color_manual(values=area_colors) +
  scale_fill_manual(values=area_colors) +
  stat_compare_means() +
  theme_classic() +
  labs(y = "Degradation") +
  theme(
    legend.position = "right",
    legend.box = "vertical",
    axis.title.x = element_blank()) +
  guides(color=guide_legend(title="Area type"), fill="none")

# #Structure by species*area_type
# structure.area <- merge_gift %>%
#   ggboxplot(., x = "species", y = "Structure", color = "area_type", fill="white", add="jitter") +
#   scale_color_manual(values=area_colors) +
#   scale_fill_manual(values=area_colors) +
#   stat_compare_means() +
#   theme_classic() +
#   labs(y = "Structure") +
#   theme(
#     legend.position = "right",
#     legend.box = "vertical",
#     axis.title.x = element_blank()) +
#   guides(color=guide_legend(title="Area type"), fill="none")
# 
# #Overall by species*area_type
# overall.area <- merge_gift %>%
#   ggboxplot(., x = "species", y = "Overall", color = "area_type", fill="white", add="jitter") +
#   scale_color_manual(values=area_colors) +
#   scale_fill_manual(values=area_colors) +
#   stat_compare_means() +
#   theme_classic() +
#   labs(y = "Overall") +
#   theme(
#     legend.position = "right",
#     legend.box = "vertical",
#     axis.title.x = element_blank()) +
#   guides(color=guide_legend(title="Area type"), fill="none")

area.legend <- get_legend(biosynth.area)


ggarrange(biosynth.area, degradation.area, #+ rremove("x.text"), 
          legend.grob = area.legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 2, nrow = 1)

merge_gift$season <-factor(merge_gift$season, levels = c("spring-summer", "autumn", "winter"))

#Biosynthesis by species*season
biosynth.season <- merge_gift %>%
  ggboxplot(., x = "species", y = "Biosynthesis", color = "season", fill="white", add="jitter") +
  scale_color_manual(values=season_colors) +
  scale_fill_manual(values=season_colors) +
  stat_compare_means() +
  theme_classic() +
  labs(y = "Biosynthesis") +
  theme(
    legend.position = "right",
    legend.box = "vertical",
    axis.title.x = element_blank()) +
  guides(color=guide_legend(title="Season"), fill="none")

#Degradation by species*season
degradation.season <- merge_gift %>%
  ggboxplot(., x = "species", y = "Degradation", color = "season", fill="white", add="jitter") +
  scale_color_manual(values=season_colors) +
  scale_fill_manual(values=season_colors) +
  stat_compare_means() +
  theme_classic() +
  labs(y = "Degradation") +
  theme(
    legend.position = "right",
    legend.box = "vertical",
    axis.title.x = element_blank()) +
  guides(color=guide_legend(title="Season"), fill="none")

# #Structure by species*season
# structure.season <- merge_gift %>%
#   ggboxplot(., x = "species", y = "Structure", color = "season", fill="white", add="jitter") +
#   scale_color_manual(values=season_colors) +
#   scale_fill_manual(values=season_colors) +
#   stat_compare_means() +
#   theme_classic() +
#   labs(y = "Structure") +
#   theme(
#     legend.position = "right",
#     legend.box = "vertical",
#     axis.title.x = element_blank()) +
#   guides(color=guide_legend(title="Season"), fill="none")
# 
# #Overall by species*season
# overall.season <- merge_gift %>%
#   ggboxplot(., x = "species", y = "Overall", color = "season", fill="white", add="jitter") +
#   scale_color_manual(values=season_colors) +
#   scale_fill_manual(values=season_colors) +
#   stat_compare_means() +
#   theme_classic() +
#   labs(y = "Overall") +
#   theme(
#     legend.position = "right",
#     legend.box = "vertical",
#     axis.title.x = element_blank()) +
#   guides(color=guide_legend(title="Season"), fill="none")

season.legend <- get_legend(biosynth.season)


ggarrange(biosynth.season, degradation.season, #+ rremove("x.text"), 
          legend.grob = season.legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 2, nrow = 1)

6.6 KEGG network analysis

genome_annotations <- read_tsv("data/genome_annotations.tsv.gz") %>%
    rename(gene=1, genome=2, contig=3)
genome_kegg <- genome_annotations %>% 
  group_by(genome) %>%
  summarise(kegg_id_list = list(unique(kegg_id[kegg_id != "NA"])))
genome_counts <- genome_counts %>%
  rownames_to_column(., var="genome")

community_kegg <-genome_kegg %>% 
  inner_join(genome_counts,by="genome") %>% 
  pivot_longer(cols = starts_with("EHI"), names_to = "sample", values_to = "abundance") %>%
  filter(abundance != 0) %>%
  select(-abundance) %>% 
  unnest(kegg_id_list) %>% 
  group_by(sample) %>%
  summarise(unique_kegg_ids = list(unique(kegg_id_list)))
species_kegg <- community_kegg %>% 
  inner_join(sample_metadata,by="sample") %>%
  select(sample,unique_kegg_ids,species) %>% 
  unnest(unique_kegg_ids) %>% 
  group_by(species, unique_kegg_ids) %>%
  summarise(count = n(), .groups = "drop")

red_ipath <- species_kegg %>% 
  filter(species=="Sciurus vulgaris") %>% 
  select(-species) %>%
  mutate(color="#f54257") %>% 
  mutate(width=str_c("W",round(count/max(count)*20,0))) %>% 
  mutate(ipath=str_c(unique_kegg_ids," ",color," ",width))
  
write_tsv(red_ipath %>% select(ipath),"data/red_ipath.tsv",col_names=F)

grey_ipath <- species_kegg %>% 
  filter(species=="Sciurus carolinensis") %>% 
  select(-species) %>%
  mutate(color="#000000") %>% 
  mutate(width=str_c("W",round(count/max(count)*20,0))) %>% 
  mutate(ipath=str_c(unique_kegg_ids," ",color," ",width))
  
write_tsv(grey_ipath %>% select(ipath),"data/grey_ipath.tsv",col_names=F)  


# Which host species each path can be found in
species_kegg2<- species_kegg %>%
  group_by(unique_kegg_ids) %>%
  mutate(host = if_else(all(species == "Sciurus vulgaris"), "only red",
                        if_else(all(species == "Sciurus carolinensis"), "only grey", "both"))) %>%
  mutate(count = sum(count)) %>%
  select(-species) %>%
  distinct() %>%
  ungroup()

both_ipath <- species_kegg2 %>% 
  mutate(color= if_else(host == "only red", "#f54257",
                        if_else(host == "only grey", "#000000", "#50c878"))) %>% 
  mutate(width=str_c("W",round(count/max(count)*20,0))) %>% 
  mutate(ipath=str_c(unique_kegg_ids," ",color," ",width))
  
write_tsv(both_ipath %>% select(ipath),"data/both_ipath.tsv",col_names=F)

The following figures were produced using the web tool ipath3:

(#fig:red_png)Red squirrel’s microbiota metabolic pathways

Red squirrel's microbiota metabolic pathways

(#fig:grey_png)Grey squirrel’s microbiota metabolic pathways

Grey squirrel's microbiota metabolic pathways

(#fig:both_png)Squirrels’ microbiota metabolic pathways - green=both species; red=only red squirrel; black=only grey squirrel

Squirrels' microbiota metabolic pathways - green=both species; red=only red squirrel; black=only grey squirrel